Package org.terrier.compression

Source Code of org.terrier.compression.TestCompressedBitFiles$TestCompressedBitFiles_BitByteStreams

/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org/
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is TestCompressedBitFiles.java
*
* The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
*   Craig Macdonald <craigm{a.}dcs.gla.ac.uk> (original contributor)
*/
package org.terrier.compression;

import static org.junit.Assert.assertEquals;
import gnu.trove.TByteArrayList;
import gnu.trove.TIntArrayList;
import gnu.trove.TLongArrayList;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.io.IOUtils.NullOutputStream;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;
import org.junit.runners.Suite.SuiteClasses;
import org.terrier.structures.BitFilePosition;
import org.terrier.structures.BitIndexPointer;
import org.terrier.structures.DirectInvertedDocidOnlyOuptutStream;
import org.terrier.structures.FilePosition;
import org.terrier.structures.postings.BasicPostingImpl;
import org.terrier.structures.postings.IterablePosting;
import org.terrier.structures.postings.IterablePostingImpl;
import org.terrier.structures.postings.Posting;
import org.terrier.structures.postings.WritablePosting;
import org.terrier.utility.Files;
import org.terrier.utility.io.RandomDataInputMemory;


@RunWith(Suite.class)
@SuiteClasses({
  TestCompressedBitFiles.TestCompressedBitFiles_Streams.class,
  TestCompressedBitFiles.TestCompressedBitFiles_BitByteStreams.class,
  //TestCompressedBitFiles.TestCompressedBitFiles_BitFile.class,
  TestCompressedBitFiles.TestCompressedBitFiles_Specific1.class,
  TestCompressedBitFiles.TestCompressedBitFiles_Specific3.class, 
  TestCompressedBitFiles.TestCompressedBitFiles_BitFileBuffered.class,
  TestCompressedBitFiles.TestCompressedBitFiles_BitFileBufferedSmallBuffer.class,
  TestCompressedBitFiles.TestCompressedBitFiles_BitFileInMemory.class,
  TestCompressedBitFiles.TestCompressedBitFiles_BitFileInMemoryLarge.class,
  //TestCompressedBitFiles.TestCompressedBitFiles_BitFile_RandomDataInputMemory.class,
  TestCompressedBitFiles.TestCompressedBitFiles_BitFileBuffered_RandomDataInputMemory.class
})

/** Ensures that Bit implementations perform as expected */
public class TestCompressedBitFiles  {


  /** An instance of IterablePostings that works with a passed array of ids
   * @author Craig Macdonald */
  static class ArrayOfIdsIterablePosting extends IterablePostingImpl {

    int[] node_links;
    int indice = -1;
   
    public ArrayOfIdsIterablePosting(int[] _ids)
    {
      node_links = _ids;
    }
   
    public int next() throws IOException {
      if (indice == node_links.length -1)
        return EOL;
      return node_links[++indice];
    }
   
    /** {@inheritDoc} */
    public boolean endOfPostings() {
      return (indice == node_links.length -1);
    }

    public WritablePosting asWritablePosting() {
      return new BasicPostingImpl(node_links[indice], 0);
    }
   
    public int getId()
    {
      return node_links[indice];
    }

   
    public int getDocumentLength() {return 0;}
    public int getFrequency() {return 0;}
    public void setId(int id) {}
    public void close() throws IOException {}

  }
 
  /** Tests come specific cases */
  public static class TestCompressedBitFiles_Specific2
  {
    int initial_bitoffset = 0;
    ArrayList<int[]> IDS = new ArrayList<int[]>();
    ArrayList<BitFilePosition> startOffsets = new ArrayList<BitFilePosition>();
    byte[] bytes;
    long byteOffset;
    byte bitOffset;
   
    @Before public void writeOut() throws IOException {
      TIntArrayList ids = new TIntArrayList();
      BufferedReader br = Files.openFileReader("/users/craigm/src/tr3/linksList");
      String line = null;
     
     
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      NullOutputStream nos = new NullOutputStream();
      BitOutputStream bo_null = new BitOutputStream(nos);
      DirectInvertedDocidOnlyOuptutStream dios = new DirectInvertedDocidOnlyOuptutStream(bo_null);
      BitOutputStream bo = new BitOutputStream(baos);
      if (initial_bitoffset > 0)
        bo.writeBinary(initial_bitoffset, 0);
      int docid = 0;
      while((line = br.readLine())!= null)
      {
        String[] parts = line.split("\\s+");
        for(String p : parts)
          ids.add(Integer.parseInt(p));
        int[] _tmp = ids.toNativeArray();
        byteOffset = bo.getByteOffset();
        bitOffset = bo.getBitOffset();
        BitFilePosition bp = new FilePosition(byteOffset, bitOffset);
        startOffsets.add(bp);
        //System.err.println(_tmp.length + "@{"+byteOffset+","+bitOffset+"}");
       
        List<Posting> postingList = new ArrayList<Posting>();
        IterablePosting ip = new ArrayOfIdsIterablePosting(_tmp);
        while(ip.next() != IterablePosting.EOL)
        {
          postingList.add(ip.asWritablePosting());
        }
        BitIndexPointer diosPointer = dios.writePostings(postingList.iterator());
       
        IDS.add(_tmp);
        ids.clear();
        int previous = -1;
        for(int i : _tmp)
        {
          bo.writeGamma(i - previous);
          previous = i;
        }
       
        assertEquals(byteOffset, diosPointer.getOffset());
        assertEquals(bitOffset, diosPointer.getOffsetBits());
        assertEquals(_tmp.length, diosPointer.getNumberOfEntries());
       
        //System.err.println("startoffset="+ bp.toString() + " dios_pointer="+ diosPointer.toString());
       
        docid++;
      }   
      bo.close();
      bytes = baos.toByteArray();
    }
   
    @Test public void testBitInputStream() throws IOException
    {
      testBitIn(new BitInputStream(new ByteArrayInputStream(bytes)));
    }
   
//    @Test public void testBitFileBuffered() throws IOException
//    {
//      testBitIn(new BitFileBuffered(new RandomDataInputMemory(bytes)).readReset(0l, (byte)0));
//    }
   
    protected void testBitIn(BitIn bi) throws IOException
    {
      if (initial_bitoffset > 0)
        bi.skipBits(initial_bitoffset);     
      for(int i=0;i<IDS.size();i++)
      {
        int[] postings = IDS.get(i);
        BitFilePosition pos = startOffsets.get(i);
        assertEquals(pos.getOffset(), bi.getByteOffset());
        assertEquals(pos.getOffsetBits(), bi.getBitOffset());
        int id = -1;
        for(int j=0;j<postings.length;j++)
        {
          int target = postings[j];
          id = bi.readGamma() + id;
          assertEquals("At index "+i,target, id);
        }
      }
    }
   
  }
 
  public static class TestCompressedBitFiles_Specific1
  {
    //int initial_bitoffset = 1;
    int initial_bitoffset = 4;
    int[] IDS = new int[]{
      //170071, 170252, 171338, 171694, 171869, 172403, 173109, 187999, 188632, 195940, 196297, 197560, 207292, 207645, 211041, 212136, 215169, 215714, 311055, 369393, 403591, 434066, 434242, 434419, 434593, 434773, 434947, 435125, 435305, 435483, 435661, 435749, 436005, 436176, 436359, 436561, 436704, 436885, 437191, 437250, 8369819, 8548046,
      1255, 1431, 1606, 1781, 1952, 2167, 2346, 2527, 2706, 2884, 3068, 7432, 8527, 10021, 14986, 277146, 582195, 582714, 879259, 7847927, 7848266,
    };
    byte[] bytes;
    long byteOffset;
    byte bitOffset;
    @Before public void writeOut() throws IOException {
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      BitOutputStream bo = new BitOutputStream(baos);
      bo.writeBinary(initial_bitoffset, 0);
      int previous = -1;
      for(int i : IDS)
      {
        bo.writeGamma(i - previous);
        previous = i;
      }
      byteOffset = bo.getByteOffset();
      bitOffset = bo.getBitOffset();
      System.err.println(IDS.length + "@{"+byteOffset+","+bitOffset+"}");
      //bo.writeUnary(10);
      bo.close();
      bytes = baos.toByteArray();
    }

   
    protected void testBitIn(BitIn bi) throws IOException
    {
      int id=-1;
      bi.skipBits(initial_bitoffset);
      for(int i : IDS)
      {
        id = bi.readGamma() + id;
        assertEquals(i, id);
      }
      assertEquals(byteOffset, bi.getByteOffset());
      assertEquals(bitOffset, bi.getBitOffset());
    }
   
    @Test public void testBitInputStream() throws IOException
    {
      testBitIn(new BitInputStream(new ByteArrayInputStream(bytes)));
    }
   
    @Test public void testBitFileBuffered() throws IOException
    {
      testBitIn(new BitFileBuffered(new RandomDataInputMemory(bytes)).readReset(0l, (byte)0));
    }
  }
 
 
  public static abstract class TestCompressedBitFiles_Basic
  {
    final int[] testNumbers = new int[]{1, 20, 2, 40, 30, 2, 9, 1, 100, 200, 40};
   
    public TestCompressedBitFiles_Basic(){}
   
    protected abstract BitOut getBitOut() throws Exception;
    protected abstract BitIn getBitIn() throws Exception;
   
    @Test public void testBit() throws Exception
    {
      BitOut out = getBitOut();
     
      FilePosition [] startOffsets = new FilePosition[testNumbers.length];
     
      for(int i=0;i<testNumbers.length;i++)
      {
        startOffsets[i] = new FilePosition(out.getByteOffset(), out.getBitOffset());
        if (i %2 == 0)
          out.writeGamma(testNumbers[i]);
        else
          out.writeUnary(testNumbers[i]);
        //System.err.println(i+ " wrote " + testNumbers[i] + " started @" + startOffsets[i] + " finished @{" + out.getByteOffset()  + "," + out.getBitOffset() + "}" );
      }
      //System.err.println("For "+testNumbers.length+"numbers, file length is {"  + out.getByteOffset()  + "," + out.getBitOffset() + "}" );
      out.close();
     
      BitIn in = getBitIn();
      for(int i=0;i<testNumbers.length;i++)
      {
        //System.err.println("i="+i + " start offset is " + in.getByteOffset() + "," + in.getBitOffset());
        assertEquals(startOffsets[i].getOffset(), in.getByteOffset());
        assertEquals(startOffsets[i].getOffsetBits(), in.getBitOffset());
        int number = (i %2 == 0) ? in.readGamma() : in.readUnary();
        assertEquals(testNumbers[i], number);
        //System.err.println("i="+ i + " end offset is " + in.getByteOffset() + "," + in.getBitOffset());
      }
      in.close();
      //System.err.println("compressed form has size " + baos.size() + " bytes");
      in = getBitIn();
     
      in.skipBytes(startOffsets[2].getOffset());
      in.skipBits(startOffsets[2].getOffsetBits());
      assertEquals(startOffsets[2].getOffset(), in.getByteOffset());
      assertEquals(startOffsets[2].getOffsetBits(), in.getBitOffset());
      for(int i=2;i<testNumbers.length;i++)
      {
        //System.err.println("i="+i + " start offset is " + in.getByteOffset() + "," + in.getBitOffset());
        assertEquals(startOffsets[i].getOffset(), in.getByteOffset());
        assertEquals(startOffsets[i].getOffsetBits(), in.getBitOffset());
        int number = (i %2 == 0) ? in.readGamma() : in.readUnary();
        assertEquals(testNumbers[i], number);
        //System.err.println("i="+ i + " end offset is " + in.getByteOffset() + "," + in.getBitOffset());
      }
      in.close();
    }
  }
 
  public static class TestCompressedBitFiles_Streams extends TestCompressedBitFiles_Basic
  {
    public TestCompressedBitFiles_Streams(){}
    ByteArrayOutputStream baos;
    protected BitOut getBitOut() throws Exception
    {
      baos = new ByteArrayOutputStream();
      return new BitOutputStream(baos);
    }
   
    protected BitIn getBitIn() throws Exception
    {
      return new BitInputStream(new ByteArrayInputStream(baos.toByteArray()));
    }
  }
 
  public static class TestCompressedBitFiles_BitByteStreams extends TestCompressedBitFiles_Basic
  {
    public TestCompressedBitFiles_BitByteStreams(){}
    ByteArrayOutputStream baos;
    protected BitOut getBitOut() throws Exception
    {
      baos = new ByteArrayOutputStream();
      return new BitByteOutputStream(baos);
    }
   
    protected BitIn getBitIn() throws Exception
    {
      return new BitInputStream(new ByteArrayInputStream(baos.toByteArray()));
    }
  }
 
  public static abstract class TestCompressedBitFiles_OnFile extends TestCompressedBitFiles_Basic
  {
    @Rule
      public TemporaryFolder tmpfolder = new TemporaryFolder();

    String filename = null;
    public TestCompressedBitFiles_OnFile() {}
   
    protected BitOut getBitOut() throws Exception
    {
      return new BitOutputStream(filename = tmpfolder.newFile("test.bf").toString());
    }
   
  }
 
  public static class TestCompressedBitFiles_BitFile extends TestCompressedBitFiles_OnFile
  {
    public TestCompressedBitFiles_BitFile(){}
   
    protected BitOut getBitOut() throws Exception
    {
      return new BitOutputStream(filename = tmpfolder.newFile("test.bf").toString());
    }
   
    @SuppressWarnings("deprecation")
    protected BitIn getBitIn() throws Exception
    {
      return new BitFile(filename, "r").readReset((long)0, (byte)0, new File(filename).length()-1, (byte)7);
    }       
  }
 
  public static class TestCompressedBitFiles_BitFileBuffered extends TestCompressedBitFiles_OnFile
  {
    public TestCompressedBitFiles_BitFileBuffered(){}
   
    protected BitOut getBitOut() throws Exception
    {
      return new BitOutputStream(filename = tmpfolder.newFile("test.bf").toString());
    }
   
    protected BitIn getBitIn() throws Exception
    {
      return new BitFileBuffered(filename).readReset((long)0, (byte)0, new File(filename).length()-1, (byte)7);
    }
  }
 
  public static class TestCompressedBitFiles_BitFileBufferedSmallBuffer extends TestCompressedBitFiles_OnFile
  {
    public TestCompressedBitFiles_BitFileBufferedSmallBuffer(){}
   
    protected BitOut getBitOut() throws Exception
    {
      return new BitOutputStream(filename = tmpfolder.newFile("test.bf").toString());
    }
   
    protected BitIn getBitIn() throws Exception
    {
      return new BitFileBuffered(new File(filename), 10).readReset((long)0, (byte)0, new File(filename).length()-1, (byte)7);
    }
  }
 
  public static class TestCompressedBitFiles_BitFileInMemory extends TestCompressedBitFiles_OnFile
  {
    public TestCompressedBitFiles_BitFileInMemory(){}
       
    protected BitIn getBitIn() throws Exception
    {
      return new BitFileInMemory(filename).readReset((long)0, (byte)0, new File(filename).length()-1, (byte)7);
    }
  }

  public static class TestCompressedBitFiles_BitFileInMemoryLarge extends TestCompressedBitFiles_OnFile
  {
    public TestCompressedBitFiles_BitFileInMemoryLarge(){}
       
    protected BitIn getBitIn() throws Exception
    {
      return new BitFileInMemoryLarge(filename).readReset((long)0, (byte)0, new File(filename).length()-1, (byte)7);
    }
  }
 
  public static class TestCompressedBitFiles_BitFile_RandomDataInputMemory extends TestCompressedBitFiles_OnFile
  {
    public TestCompressedBitFiles_BitFile_RandomDataInputMemory(){}
       
    @SuppressWarnings("deprecation")
    protected BitIn getBitIn() throws Exception
    {
      return new BitFile(new RandomDataInputMemory(filename)).readReset((long)0, (byte)0, new File(filename).length()-1, (byte)7);
    }
  }
 
  public static class TestCompressedBitFiles_BitFileBuffered_RandomDataInputMemory extends TestCompressedBitFiles_OnFile
  {
    public TestCompressedBitFiles_BitFileBuffered_RandomDataInputMemory(){}
       
    protected BitIn getBitIn() throws Exception
    {
      return new BitFileBuffered(new RandomDataInputMemory(filename)).readReset((long)0, (byte)0, new File(filename).length()-1, (byte)7);
    }
  }
 
  public static class TestCompressedBitFiles_Specific3
  {
    //int initial_bitoffset = 1;
    int initial_bitoffset = 0;
    int[][] IDS = new int[][]{
      new int[]{100,200,300,400}, new int[]{ 0, 1, 2, 4, 8}, new int[]{ 0, 8, 10}
    };
    byte[] bytes;
    TLongArrayList byteOffsets = new TLongArrayList();
    TByteArrayList bitOffsets = new TByteArrayList();

    @Before public void writeOut() throws IOException {
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      BitOutputStream bo = new BitOutputStream(baos);
      bo.writeBinary(initial_bitoffset, 0);
      for(int[] ids: IDS)
      {
        int previous = -1;
        byteOffsets.add(bo.getByteOffset());
        bitOffsets.add(bo.getBitOffset());
        //long byteOffset = bo.getByteOffset();
        //byte bitOffset = bo.getBitOffset();
        //System.err.println(ids.length + "@{"+byteOffset+","+bitOffset+"}");
        for(int i : ids)
        {
          bo.writeGamma(i - previous);
          previous = i;
        }
      }     
      bo.close();
      bytes = baos.toByteArray();
    }

   
    protected void testBitIn(BitIn bi) throws IOException
    {
      int index = 0;
      bi.skipBits(initial_bitoffset);
      for(int[] ids : IDS)
      {
        assertEquals(byteOffsets.get(index), bi.getByteOffset());
        assertEquals(bitOffsets.get(index), bi.getBitOffset());
        int id=-1;
        for (int i : ids)
        {
          id = bi.readGamma() + id;
          assertEquals(i, id);
        }
        index++;
      }     
    }
   
    protected void testBitInSkip(int offset, BitIn bi) throws IOException
    {
      bi.skipBits(initial_bitoffset);
     
      bi.skipBytes(byteOffsets.get(offset));
      bi.skipBits(bitOffsets.get(offset));
     
      for(int index=offset;index<IDS.length;index++)
      {
        int[] ids = IDS[index];
        assertEquals(byteOffsets.get(index), bi.getByteOffset());
        assertEquals(bitOffsets.get(index), bi.getBitOffset());
        int id=-1;
        for (int i : ids)
        {
          id = bi.readGamma() + id;
          assertEquals(i, id);
        }
      }     
    }
   
   
    @Test public void testBitInputStream() throws IOException
    {
      testBitIn(new BitInputStream(new ByteArrayInputStream(bytes)));
      testBitInSkip(1, new BitInputStream(new ByteArrayInputStream(bytes)));
      testBitInSkip(2, new BitInputStream(new ByteArrayInputStream(bytes)));
    }
   
    @Test public void testBitFileBuffered() throws IOException
    {
      testBitIn(new BitFileBuffered(new RandomDataInputMemory(bytes)).readReset(0l, (byte)0));
      testBitInSkip(1, new BitFileBuffered(new RandomDataInputMemory(bytes)).readReset(0l, (byte)0));
      testBitInSkip(2, new BitFileBuffered(new RandomDataInputMemory(bytes)).readReset(0l, (byte)0));
    }
  }
 
}
TOP

Related Classes of org.terrier.compression.TestCompressedBitFiles$TestCompressedBitFiles_BitByteStreams

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.